home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
9-Digit Zip Code Directory
/
9-Digit Zip Code Directory (American Business Information) (ABIZIP-12).ISO
/
z4src.zip
/
CPRECID.C
< prev
next >
Wrap
C/C++ Source or Header
|
1993-07-06
|
13KB
|
486 lines
//----------------------------------------------------------------------------
// MODULE DESCRIPTION
//
// Module: cprecid.c
// Title: Data Compression Library
// Notice: John M. Weeder
// Copyright (c) 1993. All rights reserved.
// This module contains proprietary information and should be
// treated as confidential.
//
//----------------------------------------------------------------------------
// MAINTENANCE HISTORY
//
// $Workfile$
// $Revision$
// $Author$
// $Date$
// $Log$
//
//----------------------------------------------------------------------------
// MODULE NARRATIVE
//
//
// This module contains compress and expand blocks of record ids.
//
// Size or terminator information is not stored in the compressed data.
//
// The size of the compressed data will NEVER be larger than the uncompressed
// data. However, as a worst case, it may be the same size.
//
// Because the block size can be up to 8K, there obviously can not be more
// than 8K records in a block. Therefore, the three bits of a record
// id offset are always unused.
//
// Data is read as a byte stream:
// At the start of compression, the current block and offset are assumed
// passed by the caller.
// If the high bit of the byte is clear. This is a simple 0..127 offset
// byte. The next record is in the same block at the given delta from the
// current record.
// If the high bit is set, read the next byte. The upper 2 bits of this
// byte are flag bits. Save them and combine the least 7 bits of the
// first byte with the least 6 bits of the second byte to form a full 13
// bit record offset. This offset may wrap within a block. Wrapping
// always takes place assuming an 8K max block size.
// An offset is 0 based if the block id was just changed or if this is
// the first record.
// The two flag bits are used as follows:
// BIT 0: A range of records is followed. The next 1/2 byte is a record
// offset which specifies the number of records in the range.
// A range always has at LEAST 3 records so the range count is
// actually 'actual range length - 3'.
// BIT 1: A new record block id delta follows. The block id delta is
// variable length and actually has an effective range of
// +/- 2 ^ 28.
// The two flags bits are NOT mutually exclusive.
//
//
// The code in this module should be written entirely in C.
// Do not use any C++ constructs.
//
// This module is portable to:
// DOS 3.X+
// MS Windows 3.X+
// OS/2 2.X+
// OS/2 2.0 PM
// SCO UNIX.
//
// The following compilers are supported:
// MSC 6.0A
// MSC/C++ 7.0
// Borland C++ 3.1 for DOS
// Borland C++ 1.0 for OS/2 2.X
// SCO UNIX cc
//
//----------------------------------------------------------------------------
#include <cp.h>
//----------------------------------------------------------------------------
// Description: Sort routine used by qsort to sort an array of record ids.
// Parameters: see qsort()
// Returns: see qsort()
//----------------------------------------------------------------------------
static int __recidsort__(const void *pv1, const void *pv2)
{
PRECID precid1 = (PRECID)pv1;
PRECID precid2 = (PRECID)pv2;
if (precid1->lBlock > precid2->lBlock)
return 1;
else if (precid1->lBlock < precid2->lBlock)
return -1;
else
{
if (precid1->usOffset > precid2->usOffset)
return 1;
else if (precid1->usOffset < precid2->usOffset)
return -1;
}
return 0;
}
//----------------------------------------------------------------------------
// Description: Decode a compressed array of record ids.
// Parameters: precid Pointer to buffer for record ids.
// cRec Size of record id buffer.
// pb Compressed data.
// cb Size of compressed data.
// pcb Variable to recieve number of record ids.
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
BOOL FN_E RecIdDecode(PRECID precid, SIZET cRecs, PBYTE pb, SIZET cb, PSIZET pcb, PRECID precidBase)
{
RECID recid;
BYTE b1, b2, b3, b4;
USHORT us, usOffset, usDelta;
LONG lDelta;
SIZET cDecode;
BOOL fBlockChange, fRange;
Assert(precid && cRecs); // Validate parameters
Assert(pb && cb);
Assert(pcb);
cDecode = 0; // Initialize variables
*pcb = cDecode;
recid.lBlock = 0; // Read initial record id
recid.usOffset = 0;
if (precidBase)
recid = *precidBase;
while (cb) // While there is still data left
{ // to decode
b1 = *pb++; // Read byte
cb--;
if (b1 & 0x80) // If another byte is continued, read it
{
if (!cb)
return FALSE;
b2 = *pb++;
cb--;
}
else
b2 = 0;
// Compute offset
usOffset = (USHORT)((((USHORT)b2 & 0x007F) << 6) + ((USHORT)b1 & 0x003F));
fBlockChange = (b1 & 0x40) != 0;
fRange = (b2 & 0x80) != 0;
if (fBlockChange) // Block change
{
recid.usOffset = 0;
if (!cb)
return FALSE;
b1 = *pb++;
b2 = 0;
b3 = 0;
b4 = 0;
cb--;
if (b1 & 0x80)
{
if (!cb)
return FALSE;
b2 = *pb++;
cb--;
if (b2 & 0x80)
{
if (!cb)
return FALSE;
b3 = *pb++;
cb--;
if (b3 & 0x80)
{
if (!cb)
return FALSE;
b4 = *pb++;
cb--;
}
}
}
lDelta = ((LONG)b4 << 20) + ((LONG)(b3 & 0x7F) << 13)
+ ((LONG)(b2 & 0x7F) << 6) + (LONG)(b1 & 0x3F);
if (b1 & 0x40)
lDelta = -lDelta;
recid.lBlock += lDelta; // Read new block and reset offset
}
if (fRange) // Block range specifier
{
if (!cb) // Read byte
return FALSE;
b1 = *pb++;
cb--;
if (b1 & 0x80) // Read continuation if specified
{
if (!cb)
return FALSE;
b2 = *pb++;
cb--;
}
else
b2 = 0;
// Calculate # of records in range
usDelta = (USHORT)(((((USHORT)b2 & 0x003F) << 7) + ((USHORT)b1 &0x007F)) + 3);
}
else
usDelta = 1; // Else a single record
recid.usOffset += usOffset; // Build expanded record id list
if (recid.usOffset >= 8192)
recid.usOffset -= 8192;
for (us = 0; us < usDelta; us++)
{
if (!cRecs) // No more space for record ids
return FALSE;
if (us)
recid.usOffset++;
*precid = recid;
precid++;
cRecs--;
cDecode++;
}
}
if (precidBase)
*precidBase = recid;
*pcb = cDecode; // Return number of decoded records
return TRUE;
}
//----------------------------------------------------------------------------
// Description: Compress an array of record ids.
// Parameters: precid Array of record ids
// cRec Number of record ids.
// pb Buffer for compressed data
// cb Size of compressed data buffer.
// pcb Variable to recieve size of compressed data.
// Record id list is sorted an de-dup'ed at exit!
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
BOOL FN_E RecIdEncode(PRECID precid, SIZET cRecs, PBYTE pb, SIZET cb, PSIZET pcb, PRECID precidBase)
{
SIZET i, j;
PRECID precid1, precid2;
RECID recid;
BYTE b1, b2, b3, b4;
LONG lDelta;
USHORT usDelta;
SIZET cEncode;
BOOL fBlockChange;
Assert(precid && cRecs); // Validate parameters
Assert(pb && cb);
Assert(pcb);
cEncode = 0; // Initialize variables
*pcb = cEncode;
recid.lBlock = 0; // Get user defined starting recid
recid.usOffset = 0;
if (precidBase)
recid = *precidBase;
// Sort records
qsort(precid, cRecs, sizeof(RECID), __recidsort__);
precid1 = precid; // Remove duplicate records
precid2 = precid1 + 1;
j = cRecs;
for (i = 1; i < j; i++, precid2++)
{ // Skip duplicate record
if (precid1 != precid2
&& precid1->lBlock == precid2->lBlock
&& precid1->usOffset == precid2->usOffset)
{
cRecs--;
}
else // Copy unique records
{
precid1++;
if (precid1 != precid2)
*precid1 = *precid2;
}
}
while (cRecs) // While records to compress, keep working
{
for (i = 1; i < cRecs; ++i) // Search for a range of records
if (precid[i-1].lBlock != precid[i].lBlock
|| precid[i-1].usOffset + 1 != precid[i].usOffset)
break;
// Check if block is changing
fBlockChange = (precid[0].lBlock != recid.lBlock);
if (fBlockChange) // Reset starting offset on block change
{
recid.usOffset = 0;
lDelta = precid[0].lBlock - recid.lBlock;
}
// Calculate delta MOD 8192
if (recid.usOffset > precid[0].usOffset)
usDelta = (USHORT)(8192 - (recid.usOffset - precid[0].usOffset));
else
usDelta = (USHORT)(precid[0].usOffset - recid.usOffset);
Assert(usDelta < 8192);
b1 = (BYTE)(usDelta & 0x003F); // Break into bytes
b2 = (BYTE)((usDelta >> 6) & 0x007F);
if (i >= 3) // Record range
b2 |= 0x80;
if (fBlockChange) // Block id has changed
b1 |= 0x40;
if (b2) // Set continuation bit if second
b1 |= 0x80; // byte is non-zero
if (!cb) // Output offset
return FALSE;
*pb++ = b1;
cb--;
cEncode++;
if (b1 & 0x80) // Output continuation
{
if (!cb)
return FALSE;
*pb++ = b2;
cb--;
cEncode++;
} // Output block change
if (fBlockChange)
{
BOOL fMinus;
if (lDelta < 0)
{
lDelta = -lDelta;
fMinus = TRUE;
}
else
fMinus = FALSE;
Assert((lDelta & 0x80000000L) == 0);
b1 = (BYTE)(lDelta & 0x3F);
b2 = (BYTE)((lDelta >> 6) & 0x7F);
b3 = (BYTE)((lDelta >> 13) & 0x7F);
b4 = (BYTE)((lDelta >> 20) & 0xFF);
if (b4)
b3 |= 0x80;
if (b3)
b2 |= 0x80;
if (b2)
b1 |= 0x80;
if (fMinus)
b1 |= 0x40;
if (!cb)
return FALSE;
*pb++ = b1;
cb--;
cEncode++;
if (b2)
{
if (!cb)
return FALSE;
*pb++ = b2;
cb--;
cEncode++;
}
if (b3)
{
if (!cb)
return FALSE;
*pb++ = b3;
cb--;
cEncode++;
}
if (b4)
{
if (!cb)
return FALSE;
*pb++ = b3;
cb--;
cEncode++;
}
}
if (i >= 3) // Output range if specified
{
usDelta = (USHORT)(i - 3);
b1 = (BYTE)(usDelta & 0x007F);
b2 = (BYTE)((usDelta >> 7) & 0x003F);
if (!cb)
return FALSE;
*pb++ = b1;
cb--;
cEncode++;
if (b1 & 0x80)
{
if (!cb)
return FALSE;
*pb++ = b2;
cb--;
cEncode++;
}
}
else
i = 1;
cRecs -= i; // Move pointers
precid += i;
recid = *(precid - 1); // Store a copy of the last record id
}
if (precidBase)
*precidBase = recid;
*pcb = cEncode;
return TRUE;
}
//----------------------------------------------------------------------------
// Description: Run standard test suite
// Parameters: sTest Test to run.
// 0 Run all default tests (except).
// Returns: TRUE if successful.
//----------------------------------------------------------------------------
#if COMPILE_TEST
BOOL FN RecIdTest(SHORT sTest)
{
static RECID arecid[][20] =
{
{{ 0, 0},{ 0, 1},{ 0, 3},{ 1, 0},{ 2, 0},{ 3, 0},{ 4, 0},{-1, -1}},
{{ 0, 0},{ 1, 1},{ 1, 2},{ 1, 3},{ 2, 0},{ 2, 1},{ 2, 2},{ 2, 3},{-1, -1}},
{{ 0, 1},{ 0, 0},{ 0, 0},{ 2, 0},{ 2, 0},{ 2, 0},{ 3, 0},{-1, -1}},
{{ 0, 0},{ 0, 127},{ 0, 128},{ 0, 1024},{-1, -1}},
{{ 0, 10},{-1, -1}},
{{ 0, 9},{-1, -1}},
{{ 0, 9},{-1, -1}},
{{ 1, 0},{-1, -1}},
{{ 9999998L, 0},{-1, -1}},
{{ 9999999L, 0},{-1, -1}},
{{ 1111111L, 0},{-1, -1}},
{{ 1111110L, 0},{-1, -1}},
{{-1, -1}}
};
static RECID arecidDecode[100];
static BYTE bBuf[4 * 100];
SIZET cb;
SIZET cRecs;
SIZET i, j, k;
RECID recid1, recid2;
NOTUSED(sTest);
recid1.lBlock = 0;
recid1.usOffset = 0;
recid2.lBlock = 0;
recid2.usOffset = 0;
for (i = 0; arecid[i][0].lBlock != -1; ++i)
{
for (j = 0; arecid[i][j].lBlock != -1; ++j)
;
if (!RecIdEncode(&arecid[i][0], j, bBuf, sizeof(bBuf), &cb, &recid1))
return FALSE;
Output("Record Set # %u\n", i+1);
Output(" %2d input records encoded into %u bytes.\n", j, cb);
// Count after de-dup
for (k = 0; arecid[i][k].lBlock != -1; ++k)
if (k > 0
&& (arecid[i][k].lBlock < arecid[i][k - 1].lBlock
|| (arecid[i][k].lBlock == arecid[i][k - 1].lBlock
&& arecid[i][k].usOffset <= arecid[i][k - 1].usOffset)))
break;
if (!RecIdDecode(&arecidDecode[0], 100, bBuf, cb, &cRecs, &recid2))
return FALSE;
Output(" %2d output records.\n", cRecs);
if (cRecs != k // Check data
|| memcmp(arecid[i], arecidDecode, cRecs) != 0)
return FALSE;
}
return TRUE;
}
#endif
//----------------------------------------------------------------------------
//------------------------------- End of File --------------------------------
//----------------------------------------------------------------------------